--- title: "Multiple Regression" author: "Oliver" date: '' output: word_document: default html_document: default --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(pacman) ``` ## Get Some Data We obtain the Student Survey and Golden State Warrior data from the web. These files are part of the Lock^5 2nd Ed. data files. ```{r} Survey = read.csv("http://facweb1.redlands.edu/fac/jim_bentley/Data/Lock5Ed2/Lock5Data2eCSV/StudentSurvey.csv") names(Survey) GSW = read.csv("http://facweb1.redlands.edu/fac/jim_bentley/Data/Lock5Ed2/Lock5Data2eCSV/GSWarriors.csv") names(GSW) ``` ## Fit GPA Suppose we want to figure out predictors of GPA. For college students from St. Lawrence University, what helps determine student success? ```{r} ### Plot all of the variables against each other pairs(Survey[,c("GPA","Height","TV","Piercings","MathSAT","VerbalSAT")]) ### Load the lattice graphics package p_load(lattice) ### Plot GPA as a function of a couple of variables. Include the regression line. p1 = xyplot(GPA ~ Height, data=Survey, type=c("p","r"), aspect=1) p2 = xyplot(GPA ~ MathSAT, data=Survey, type=c("p","r"), aspect=1) ### A boxplot works well for categorical/factor variables p3 =bwplot(Gender~GPA, data=Survey, aspect=1) ### Plot the graphics in a sinle image to save space print(p1, split = c(1, 1, 3, 1), more = TRUE) print(p2, split = c(2, 1, 3, 1), more = TRUE) print(p3, split = c(3, 1, 3, 1), more = FALSE) ### Fit a multiple linear regression model GPA.lm = lm(GPA ~ Height + TV + Piercings + MathSAT + VerbalSAT, data=Survey) ### Get the parameter estimates, standard errors, t-stats, and p-vals summary(GPA.lm) ### Fit a reduced model GPA.lm = lm(GPA ~ Height + MathSAT + VerbalSAT, data=Survey) ### Get the parameter estimates, standard errors, t-stats, and p-vals summary(GPA.lm) ### Check residuals p1 = xyplot(residuals(GPA.lm)~predict(GPA.lm)) p2 = xyplot(residuals(GPA.lm)~Survey$Height) p3 = xyplot(residuals(GPA.lm)~Survey$MathSAT) p4 = xyplot(residuals(GPA.lm)~Survey$MathSAT) ### Plot using split to get four plots in a single image print(p1, split = c(1, 1, 2, 2), more = TRUE) print(p2, split = c(2, 1, 2, 2), more = TRUE) print(p3, split = c(1, 2, 2, 2), more = TRUE) print(p4, split = c(2, 2, 2, 2), more = FALSE) # more = FALSE is redundant ### Plot the default residual plots in a single image par(mfrow=c(2,2)) plot(GPA.lm) par(mfrow=c(1,1)) ### For fun, plot the plane of estimates determined by Math and Verbal SATs p_load(scatterplot3d) s3d = scatterplot3d(Survey$VerbalSAT, Survey$MathSAT, Survey$GPA, xlab="Verbal SAT", ylab="Math SAT", zlab="GPA", angle=65) s3d$plane3d(lm(GPA ~ MathSAT + VerbalSAT, data=Survey)) par(mfrow=c(1,1)) ``` ## Fit Points We can estimate the number of points scored by the Golden State Warriors (2015-2016 season) using a multiple linear regression model. ```{r} ### Plot all of the variables against each other pairs(GSW[,c("Points","Rebounds","Steals","Blocks","Assists")]) ### Load the lattice graphics package p_load(lattice) ### Plot Points as a function of a couple of variables. Include the regression line. Use split to get them in a single graphic. p1 = xyplot(Points ~ Rebounds, data=GSW, type=c("p","r"), aspect=1) p2 = xyplot(Points ~ Assists, data=GSW, type=c("p","r"), aspect=1) print(p1, split = c(1, 1, 2, 1), more = TRUE) print(p2, split = c(2, 1, 2, 1), more = FALSE) ### Fit a multiple linear regression model GSW.lm = lm(Points ~ Rebounds + Steals + Blocks + Assists, data=GSW) ### Get the parameter estimates, standard errors, t-stats, and p-vals summary(GSW.lm) ### Try a reduced model GSW.lm = lm(Points~Steals + Assists, data=GSW) summary(GSW.lm) ### Check residuals p1 = xyplot(residuals(GSW.lm)~predict(GSW.lm)) p2 = xyplot(residuals(GSW.lm)~GSW$Assists) p3 = xyplot(residuals(GSW.lm)~GSW$Steals) ### Plot lattice plots in single graphic image print(p1, split = c(1, 1, 2, 2), more = TRUE) print(p2, split = c(2, 1, 2, 2), more = TRUE) print(p3, split = c(1, 2, 2, 2), more = FALSE) ### Use base plot to get default residual plots in a single graphic par(mfrow=c(2,2)) plot(GSW.lm) par(mfrow=c(1,1)) ### For fun, plot the plane of estimates determined by Rebounds and Assists p_load(scatterplot3d) s3d = scatterplot3d(GSW$Steals, GSW$Assists, GSW$Points, xlab="Steals", ylab="Assists", zlab="Points", angle=60) s3d$plane3d(lm(Points ~ Steals + Assists, data=GSW)) ```